{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 数据读取\n",
    "\n",
    "|数据类型|读取方法|\n",
    "|--|--|\n",
    "|csv、tsv、txt|pd.read_csv|\n",
    "|excel|pd.read_excel|"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 133,
   "metadata": {},
   "outputs": [],
   "source": [
    "fpath = r'D:\\Python\\Code\\Study2022\\self-study-package\\data.csv'\n",
    "f2path = r'D:\\Python\\Code\\Study2022\\self-study-package\\data.txt'\n",
    "f3path = r'D:\\Python\\Code\\Study2022\\self-study-package\\data.xlsx'\n",
    "\n",
    "data_pd = pd.read_csv(fpath) #读取数据\n",
    "data2_pd = pd.read_csv(\n",
    "    f2path,\n",
    "    sep='/', #设置分隔符\n",
    "    header=None, #没有标题行\n",
    "    names=['data','ID','height'],\n",
    ")\n",
    "data3_pd = pd.read_excel(f3path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>NO.num</th>\n",
       "      <th>name</th>\n",
       "      <th>height</th>\n",
       "      <th>weight</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>zhangsan</td>\n",
       "      <td>169.466826</td>\n",
       "      <td>90.407131</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>lisi</td>\n",
       "      <td>168.671984</td>\n",
       "      <td>93.680979</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>wangwu</td>\n",
       "      <td>165.335473</td>\n",
       "      <td>91.600758</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>zhaoliu</td>\n",
       "      <td>163.528142</td>\n",
       "      <td>95.742389</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>erkang</td>\n",
       "      <td>169.912478</td>\n",
       "      <td>99.317983</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   NO.num      name      height     weight\n",
       "0       1  zhangsan  169.466826  90.407131\n",
       "1       2      lisi  168.671984  93.680979\n",
       "2       3    wangwu  165.335473  91.600758\n",
       "3       4   zhaoliu  163.528142  95.742389\n",
       "4       5    erkang  169.912478  99.317983"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_pd.head()  #读取前几行   \n",
    "# 最上面一行叫：columns\n",
    "# 最左边一列叫：index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(10, 4)\n",
      "Index(['NO.num', 'name', 'height', 'weight'], dtype='object')\n",
      "RangeIndex(start=0, stop=10, step=1)\n",
      "NO.num      int64\n",
      "name       object\n",
      "height    float64\n",
      "weight    float64\n",
      "dtype: object\n"
     ]
    }
   ],
   "source": [
    "print(data_pd.shape) #返回 (行数，列数) 不带标题行与index列\n",
    "print(data_pd.columns) #查看列名\n",
    "print(data_pd.index) #查看index\n",
    "print(data_pd.dtypes) #查看数据类型\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>data</th>\n",
       "      <th>ID</th>\n",
       "      <th>height</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2012-01-02</td>\n",
       "      <td>4112365</td>\n",
       "      <td>162</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2012-01-03</td>\n",
       "      <td>135235</td>\n",
       "      <td>167</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2012-01-04</td>\n",
       "      <td>5151</td>\n",
       "      <td>162</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2012-01-05</td>\n",
       "      <td>8155</td>\n",
       "      <td>167</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2012-01-06</td>\n",
       "      <td>58513</td>\n",
       "      <td>162</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         data       ID  height\n",
       "0  2012-01-02  4112365     162\n",
       "1  2012-01-03   135235     167\n",
       "2  2012-01-04     5151     162\n",
       "3  2012-01-05     8155     167\n",
       "4  2012-01-06    58513     162"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data2_pd.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>id</th>\n",
       "      <th>weight</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>xiaohuo</td>\n",
       "      <td>1</td>\n",
       "      <td>80</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>zoom</td>\n",
       "      <td>2</td>\n",
       "      <td>79</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>gongjue</td>\n",
       "      <td>3</td>\n",
       "      <td>65</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>theshy</td>\n",
       "      <td>4</td>\n",
       "      <td>75</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>jkl</td>\n",
       "      <td>5</td>\n",
       "      <td>65</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>gala</td>\n",
       "      <td>6</td>\n",
       "      <td>68</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>xiaoheiban</td>\n",
       "      <td>7</td>\n",
       "      <td>60</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         name  id  weight\n",
       "0     xiaohuo   1      80\n",
       "1        zoom   2      79\n",
       "2     gongjue   3      65\n",
       "3      theshy   4      75\n",
       "4         jkl   5      65\n",
       "5        gala   6      68\n",
       "6  xiaoheiban   7      60"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data3_pd"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### pandas 数据结构\n",
    "|类型|描述|\n",
    "|--|--|\n",
    "|DataFrame|二维数据，整个表格，多行多列|\n",
    "|Series|一维数据，一行或一列|"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "name      xiaohong\n",
       "height         162\n",
       "dtype: object"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "### 创建一个series\n",
    "s1 = pd.Series(['xiaohong','162','78'],index=['name','height','weight'])\n",
    "s1\n",
    "s1['name'] #通过index访问值\n",
    "s1[['name','height']] #返回的是series 对象"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>weight</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>xiaowang</td>\n",
       "      <td>85</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>xiaoli</td>\n",
       "      <td>65</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>xiaoqing</td>\n",
       "      <td>75</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       name  weight\n",
       "0  xiaowang      85\n",
       "1    xiaoli      65\n",
       "2  xiaoqing      75"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "###通过多个字典序列创建dataframe\n",
    "data001 ={\n",
    "    'name':['xiaowang','xiaoli','xiaoqing'],\n",
    "    'weight':[85,65,75],\n",
    "    'height':[162,156,186]\n",
    "}\n",
    "\n",
    "df1 = pd.DataFrame(data001) #将多个字典合并成dataframe\n",
    "df1['name']  ## 查询一列 返回 series 对象\n",
    "df1[['name','weight']]  ##查询多列 返回 dataframe 对象\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>weight</th>\n",
       "      <th>height</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>xiaowang</td>\n",
       "      <td>85</td>\n",
       "      <td>162</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>xiaoli</td>\n",
       "      <td>65</td>\n",
       "      <td>156</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>xiaoqing</td>\n",
       "      <td>75</td>\n",
       "      <td>186</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       name  weight  height\n",
       "0  xiaowang      85     162\n",
       "1    xiaoli      65     156\n",
       "2  xiaoqing      75     186"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df1.loc[1] # 查询一行，返回 series\n",
    "df1.loc[0:3] #查询多行 返回 dataframe"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### pandas 查询数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    xiaowang\n",
       "1      xiaoli\n",
       "2    xiaoqing\n",
       "Name: name, dtype: object"
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df1.loc[0:2,'name'] #查询前三行 ，name列 返回 df 类型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>weight</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>xiaowang</td>\n",
       "      <td>85</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>xiaoli</td>\n",
       "      <td>65</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>xiaoqing</td>\n",
       "      <td>75</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       name  weight\n",
       "0  xiaowang      85\n",
       "1    xiaoli      65\n",
       "2  xiaoqing      75"
      ]
     },
     "execution_count": 58,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df1.loc[0:3,'name':'weight'] #同时查找 前三行 和 前两列 "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 条件表达式"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>NO.num</th>\n",
       "      <th>name</th>\n",
       "      <th>height</th>\n",
       "      <th>weight</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>zhangsan</td>\n",
       "      <td>169.466826</td>\n",
       "      <td>90.407131</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>lisi</td>\n",
       "      <td>168.671984</td>\n",
       "      <td>93.680979</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>wangwu</td>\n",
       "      <td>165.335473</td>\n",
       "      <td>91.600758</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>zhaoliu</td>\n",
       "      <td>163.528142</td>\n",
       "      <td>95.742389</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>erkang</td>\n",
       "      <td>169.912478</td>\n",
       "      <td>99.317983</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>6</td>\n",
       "      <td>ziwei</td>\n",
       "      <td>167.861595</td>\n",
       "      <td>95.817151</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>7</td>\n",
       "      <td>xiaozhao</td>\n",
       "      <td>165.786184</td>\n",
       "      <td>93.826696</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>8</td>\n",
       "      <td>xiaoli</td>\n",
       "      <td>168.796341</td>\n",
       "      <td>98.723368</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>9</td>\n",
       "      <td>zhaozhao</td>\n",
       "      <td>163.623267</td>\n",
       "      <td>91.402040</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>10</td>\n",
       "      <td>aoen</td>\n",
       "      <td>161.005858</td>\n",
       "      <td>98.991811</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   NO.num      name      height     weight\n",
       "0       1  zhangsan  169.466826  90.407131\n",
       "1       2      lisi  168.671984  93.680979\n",
       "2       3    wangwu  165.335473  91.600758\n",
       "3       4   zhaoliu  163.528142  95.742389\n",
       "4       5    erkang  169.912478  99.317983\n",
       "5       6     ziwei  167.861595  95.817151\n",
       "6       7  xiaozhao  165.786184  93.826696\n",
       "7       8    xiaoli  168.796341  98.723368\n",
       "8       9  zhaozhao  163.623267  91.402040\n",
       "9      10      aoen  161.005858  98.991811"
      ]
     },
     "execution_count": 61,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>NO.num</th>\n",
       "      <th>name</th>\n",
       "      <th>height</th>\n",
       "      <th>weight</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>zhangsan</td>\n",
       "      <td>169.466826</td>\n",
       "      <td>90.407131</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>lisi</td>\n",
       "      <td>168.671984</td>\n",
       "      <td>93.680979</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>wangwu</td>\n",
       "      <td>165.335473</td>\n",
       "      <td>91.600758</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>7</td>\n",
       "      <td>xiaozhao</td>\n",
       "      <td>165.786184</td>\n",
       "      <td>93.826696</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>9</td>\n",
       "      <td>zhaozhao</td>\n",
       "      <td>163.623267</td>\n",
       "      <td>91.402040</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   NO.num      name      height     weight\n",
       "0       1  zhangsan  169.466826  90.407131\n",
       "1       2      lisi  168.671984  93.680979\n",
       "2       3    wangwu  165.335473  91.600758\n",
       "6       7  xiaozhao  165.786184  93.826696\n",
       "8       9  zhaozhao  163.623267  91.402040"
      ]
     },
     "execution_count": 62,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_pd.loc[data_pd['weight']<95,:] #查询所有列中 ， 体重小于95的数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>NO.num</th>\n",
       "      <th>name</th>\n",
       "      <th>height</th>\n",
       "      <th>weight</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>zhangsan</td>\n",
       "      <td>169.466826</td>\n",
       "      <td>90.407131</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>lisi</td>\n",
       "      <td>168.671984</td>\n",
       "      <td>93.680979</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   NO.num      name      height     weight\n",
       "0       1  zhangsan  169.466826  90.407131\n",
       "1       2      lisi  168.671984  93.680979"
      ]
     },
     "execution_count": 63,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_pd.loc[(data_pd['weight']<95) & (data_pd['height']>166),:]  ##多个条件"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### pandas  修改/添加 数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>data</th>\n",
       "      <th>ID</th>\n",
       "      <th>height</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2012-01-02</td>\n",
       "      <td>4112365</td>\n",
       "      <td>162</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2012-01-03</td>\n",
       "      <td>135235</td>\n",
       "      <td>167</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2012-01-04</td>\n",
       "      <td>5151</td>\n",
       "      <td>162</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2012-01-05</td>\n",
       "      <td>8155</td>\n",
       "      <td>167</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2012-01-06</td>\n",
       "      <td>58513</td>\n",
       "      <td>162</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         data       ID  height\n",
       "0  2012-01-02  4112365     162\n",
       "1  2012-01-03   135235     167\n",
       "2  2012-01-04     5151     162\n",
       "3  2012-01-05     8155     167\n",
       "4  2012-01-06    58513     162"
      ]
     },
     "execution_count": 65,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data2_pd.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 新增列"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>data</th>\n",
       "      <th>ID</th>\n",
       "      <th>height</th>\n",
       "      <th>weight</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2012-01-02</td>\n",
       "      <td>4112365</td>\n",
       "      <td>162</td>\n",
       "      <td>80</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2012-01-03</td>\n",
       "      <td>135235</td>\n",
       "      <td>167</td>\n",
       "      <td>80</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2012-01-04</td>\n",
       "      <td>5151</td>\n",
       "      <td>162</td>\n",
       "      <td>80</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2012-01-05</td>\n",
       "      <td>8155</td>\n",
       "      <td>167</td>\n",
       "      <td>80</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2012-01-06</td>\n",
       "      <td>58513</td>\n",
       "      <td>162</td>\n",
       "      <td>80</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         data       ID  height  weight\n",
       "0  2012-01-02  4112365     162      80\n",
       "1  2012-01-03   135235     167      80\n",
       "2  2012-01-04     5151     162      80\n",
       "3  2012-01-05     8155     167      80\n",
       "4  2012-01-06    58513     162      80"
      ]
     },
     "execution_count": 69,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data2_pd.loc[:,'weight'] = 80\n",
    "data2_pd.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "162    4\n",
       "167    4\n",
       "Name: height, dtype: int64"
      ]
     },
     "execution_count": 70,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data2_pd['height'].value_counts() ##对每个类型 进行计数统计"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 数据统计函数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>height</th>\n",
       "      <th>weight</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>8.000000</td>\n",
       "      <td>8.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>164.500000</td>\n",
       "      <td>80.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>2.672612</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>162.000000</td>\n",
       "      <td>80.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>162.000000</td>\n",
       "      <td>80.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>164.500000</td>\n",
       "      <td>80.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>167.000000</td>\n",
       "      <td>80.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>167.000000</td>\n",
       "      <td>80.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           height  weight\n",
       "count    8.000000     8.0\n",
       "mean   164.500000    80.0\n",
       "std      2.672612     0.0\n",
       "min    162.000000    80.0\n",
       "25%    162.000000    80.0\n",
       "50%    164.500000    80.0\n",
       "75%    167.000000    80.0\n",
       "max    167.000000    80.0"
      ]
     },
     "execution_count": 71,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data2_pd.describe() #描述性统计"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([162, 167], dtype=int64)"
      ]
     },
     "execution_count": 72,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data2_pd['height'].unique() #去重统计"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "162    4\n",
       "167    4\n",
       "Name: height, dtype: int64"
      ]
     },
     "execution_count": 73,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data2_pd['height'].value_counts() ##对每个类型 进行计数统计"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 保存数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [],
   "source": [
    "data2_pd\n",
    "data2_pd.to_excel(r'D:\\Python\\Code\\Study2022\\self-study-package\\data_gai.xlsx',index=False) \n",
    "## 不保存第一列 index"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 数据排序"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>NO.num</th>\n",
       "      <th>name</th>\n",
       "      <th>height</th>\n",
       "      <th>weight</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>zhangsan</td>\n",
       "      <td>169.466826</td>\n",
       "      <td>90.407131</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>lisi</td>\n",
       "      <td>168.671984</td>\n",
       "      <td>93.680979</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>wangwu</td>\n",
       "      <td>165.335473</td>\n",
       "      <td>91.600758</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>zhaoliu</td>\n",
       "      <td>163.528142</td>\n",
       "      <td>95.742389</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>erkang</td>\n",
       "      <td>169.912478</td>\n",
       "      <td>99.317983</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   NO.num      name      height     weight\n",
       "0       1  zhangsan  169.466826  90.407131\n",
       "1       2      lisi  168.671984  93.680979\n",
       "2       3    wangwu  165.335473  91.600758\n",
       "3       4   zhaoliu  163.528142  95.742389\n",
       "4       5    erkang  169.912478  99.317983"
      ]
     },
     "execution_count": 79,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_pd.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>NO.num</th>\n",
       "      <th>name</th>\n",
       "      <th>height</th>\n",
       "      <th>weight</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>zhangsan</td>\n",
       "      <td>169.466826</td>\n",
       "      <td>90.407131</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>9</td>\n",
       "      <td>zhaozhao</td>\n",
       "      <td>163.623267</td>\n",
       "      <td>91.402040</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>wangwu</td>\n",
       "      <td>165.335473</td>\n",
       "      <td>91.600758</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>lisi</td>\n",
       "      <td>168.671984</td>\n",
       "      <td>93.680979</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>7</td>\n",
       "      <td>xiaozhao</td>\n",
       "      <td>165.786184</td>\n",
       "      <td>93.826696</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>zhaoliu</td>\n",
       "      <td>163.528142</td>\n",
       "      <td>95.742389</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>6</td>\n",
       "      <td>ziwei</td>\n",
       "      <td>167.861595</td>\n",
       "      <td>95.817151</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>8</td>\n",
       "      <td>xiaoli</td>\n",
       "      <td>168.796341</td>\n",
       "      <td>98.723368</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>10</td>\n",
       "      <td>aoen</td>\n",
       "      <td>161.005858</td>\n",
       "      <td>98.991811</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>erkang</td>\n",
       "      <td>169.912478</td>\n",
       "      <td>99.317983</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   NO.num      name      height     weight\n",
       "0       1  zhangsan  169.466826  90.407131\n",
       "8       9  zhaozhao  163.623267  91.402040\n",
       "2       3    wangwu  165.335473  91.600758\n",
       "1       2      lisi  168.671984  93.680979\n",
       "6       7  xiaozhao  165.786184  93.826696\n",
       "3       4   zhaoliu  163.528142  95.742389\n",
       "5       6     ziwei  167.861595  95.817151\n",
       "7       8    xiaoli  168.796341  98.723368\n",
       "9      10      aoen  161.005858  98.991811\n",
       "4       5    erkang  169.912478  99.317983"
      ]
     },
     "execution_count": 80,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_pd.sort_values(by='weight') # 默认升序"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>NO.num</th>\n",
       "      <th>name</th>\n",
       "      <th>height</th>\n",
       "      <th>weight</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>erkang</td>\n",
       "      <td>169.912478</td>\n",
       "      <td>99.317983</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>10</td>\n",
       "      <td>aoen</td>\n",
       "      <td>161.005858</td>\n",
       "      <td>98.991811</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>8</td>\n",
       "      <td>xiaoli</td>\n",
       "      <td>168.796341</td>\n",
       "      <td>98.723368</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>6</td>\n",
       "      <td>ziwei</td>\n",
       "      <td>167.861595</td>\n",
       "      <td>95.817151</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>zhaoliu</td>\n",
       "      <td>163.528142</td>\n",
       "      <td>95.742389</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>7</td>\n",
       "      <td>xiaozhao</td>\n",
       "      <td>165.786184</td>\n",
       "      <td>93.826696</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>lisi</td>\n",
       "      <td>168.671984</td>\n",
       "      <td>93.680979</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>wangwu</td>\n",
       "      <td>165.335473</td>\n",
       "      <td>91.600758</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>9</td>\n",
       "      <td>zhaozhao</td>\n",
       "      <td>163.623267</td>\n",
       "      <td>91.402040</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>zhangsan</td>\n",
       "      <td>169.466826</td>\n",
       "      <td>90.407131</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   NO.num      name      height     weight\n",
       "4       5    erkang  169.912478  99.317983\n",
       "9      10      aoen  161.005858  98.991811\n",
       "7       8    xiaoli  168.796341  98.723368\n",
       "5       6     ziwei  167.861595  95.817151\n",
       "3       4   zhaoliu  163.528142  95.742389\n",
       "6       7  xiaozhao  165.786184  93.826696\n",
       "1       2      lisi  168.671984  93.680979\n",
       "2       3    wangwu  165.335473  91.600758\n",
       "8       9  zhaozhao  163.623267  91.402040\n",
       "0       1  zhangsan  169.466826  90.407131"
      ]
     },
     "execution_count": 81,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_pd.sort_values(by='weight',ascending=False) # 降序排列 "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 字符串处理 \n",
    "#### 只能在 series 使用"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 150,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>NO.num</th>\n",
       "      <th>name</th>\n",
       "      <th>height</th>\n",
       "      <th>weight</th>\n",
       "      <th>sex</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>zhangsan</td>\n",
       "      <td>169.466826</td>\n",
       "      <td>90.407131</td>\n",
       "      <td>female</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>lisi</td>\n",
       "      <td>168.671984</td>\n",
       "      <td>93.680979</td>\n",
       "      <td>female</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>wangwu</td>\n",
       "      <td>165.335473</td>\n",
       "      <td>91.600758</td>\n",
       "      <td>female</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>zhaoliu</td>\n",
       "      <td>163.528142</td>\n",
       "      <td>95.742389</td>\n",
       "      <td>female</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>erkang</td>\n",
       "      <td>169.912478</td>\n",
       "      <td>99.317983</td>\n",
       "      <td>female</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>6</td>\n",
       "      <td>ziwei</td>\n",
       "      <td>167.861595</td>\n",
       "      <td>95.817151</td>\n",
       "      <td>male</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>7</td>\n",
       "      <td>xiaozhao</td>\n",
       "      <td>165.786184</td>\n",
       "      <td>93.826696</td>\n",
       "      <td>male</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>8</td>\n",
       "      <td>xiaoli</td>\n",
       "      <td>168.796341</td>\n",
       "      <td>98.723368</td>\n",
       "      <td>male</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>9</td>\n",
       "      <td>zhaozhao</td>\n",
       "      <td>163.623267</td>\n",
       "      <td>91.402040</td>\n",
       "      <td>male</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>10</td>\n",
       "      <td>aoen</td>\n",
       "      <td>161.005858</td>\n",
       "      <td>98.991811</td>\n",
       "      <td>male</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   NO.num      name      height     weight     sex\n",
       "0       1  zhangsan  169.466826  90.407131  female\n",
       "1       2      lisi  168.671984  93.680979  female\n",
       "2       3    wangwu  165.335473  91.600758  female\n",
       "3       4   zhaoliu  163.528142  95.742389  female\n",
       "4       5    erkang  169.912478  99.317983  female\n",
       "5       6     ziwei  167.861595  95.817151    male\n",
       "6       7  xiaozhao  165.786184  93.826696    male\n",
       "7       8    xiaoli  168.796341  98.723368    male\n",
       "8       9  zhaozhao  163.623267  91.402040    male\n",
       "9      10      aoen  161.005858  98.991811    male"
      ]
     },
     "execution_count": 150,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_pd.loc[0:5,'sex'] = '0'\n",
    "data_pd.loc[5:,'sex'] = '1'\n",
    "data_pd.loc[0:5,'sex'] = data_pd['sex'].str.replace('0','female') #直接replace 不会修改 原数据表格\n",
    "data_pd.loc[5:,'sex'] = data_pd['sex'].str.replace('1','male')\n",
    "data_pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 151,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>NO.num</th>\n",
       "      <th>name</th>\n",
       "      <th>height</th>\n",
       "      <th>weight</th>\n",
       "      <th>sex</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>zhangsan</td>\n",
       "      <td>169.466826</td>\n",
       "      <td>90.407131</td>\n",
       "      <td>female</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>lisi</td>\n",
       "      <td>168.671984</td>\n",
       "      <td>93.680979</td>\n",
       "      <td>female</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>wangwu</td>\n",
       "      <td>165.335473</td>\n",
       "      <td>91.600758</td>\n",
       "      <td>female</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>zhaoliu</td>\n",
       "      <td>163.528142</td>\n",
       "      <td>95.742389</td>\n",
       "      <td>female</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>erkang</td>\n",
       "      <td>169.912478</td>\n",
       "      <td>99.317983</td>\n",
       "      <td>female</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>6</td>\n",
       "      <td>ziwei</td>\n",
       "      <td>167.861595</td>\n",
       "      <td>95.817151</td>\n",
       "      <td>male</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>7</td>\n",
       "      <td>xiaozhao</td>\n",
       "      <td>165.786184</td>\n",
       "      <td>93.826696</td>\n",
       "      <td>male</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>8</td>\n",
       "      <td>xiaoli</td>\n",
       "      <td>168.796341</td>\n",
       "      <td>98.723368</td>\n",
       "      <td>male</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>9</td>\n",
       "      <td>zhaozhao</td>\n",
       "      <td>163.623267</td>\n",
       "      <td>91.402040</td>\n",
       "      <td>male</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>10</td>\n",
       "      <td>aoen</td>\n",
       "      <td>161.005858</td>\n",
       "      <td>98.991811</td>\n",
       "      <td>male</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   NO.num      name      height     weight     sex\n",
       "0       1  zhangsan  169.466826  90.407131  female\n",
       "1       2      lisi  168.671984  93.680979  female\n",
       "2       3    wangwu  165.335473  91.600758  female\n",
       "3       4   zhaoliu  163.528142  95.742389  female\n",
       "4       5    erkang  169.912478  99.317983  female\n",
       "5       6     ziwei  167.861595  95.817151    male\n",
       "6       7  xiaozhao  165.786184  93.826696    male\n",
       "7       8    xiaoli  168.796341  98.723368    male\n",
       "8       9  zhaozhao  163.623267  91.402040    male\n",
       "9      10      aoen  161.005858  98.991811    male"
      ]
     },
     "execution_count": 151,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_pd"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 删除列、行"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 171,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[0 0 2 1]\n",
      " [2 9 8 8]\n",
      " [7 9 1 8]]\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>c1</th>\n",
       "      <th>c2</th>\n",
       "      <th>c3</th>\n",
       "      <th>c4</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>9</td>\n",
       "      <td>8</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>7</td>\n",
       "      <td>9</td>\n",
       "      <td>1</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   c1  c2  c3  c4\n",
       "0   0   0   2   1\n",
       "1   2   9   8   8\n",
       "2   7   9   1   8"
      ]
     },
     "execution_count": 171,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "A = np.random.randint(0,10,12)\n",
    "A = A.reshape(3,4)\n",
    "print(A)\n",
    "A_np = pd.DataFrame(A,columns=['c1','c2','c3','c4'])\n",
    "A_np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 173,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>c1</th>\n",
       "      <th>c2</th>\n",
       "      <th>c3</th>\n",
       "      <th>c4</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>9</td>\n",
       "      <td>8</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>7</td>\n",
       "      <td>9</td>\n",
       "      <td>1</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   c1  c2  c3  c4\n",
       "0   0   0   2   1\n",
       "1   2   9   8   8\n",
       "2   7   9   1   8"
      ]
     },
     "execution_count": 173,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "A_np.drop('c1',axis=1) # 删除 列 但是A_np 没有改变哦 和numpy的0 1 相反"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 175,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>c1</th>\n",
       "      <th>c2</th>\n",
       "      <th>c3</th>\n",
       "      <th>c4</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>7</td>\n",
       "      <td>9</td>\n",
       "      <td>1</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   c1  c2  c3  c4\n",
       "0   0   0   2   1\n",
       "2   7   9   1   8"
      ]
     },
     "execution_count": 175,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "A_np.drop(1,axis=0) # 删除 行 但是A_np 没有改变哦"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 其他函数\n",
    "|函数|用途|参数|\n",
    "|--|--|--|\n",
    "|df.set_index('id',inplace = True)|将第一列作为index|drop=False 将索引列还保存在col中|\n"
   ]
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "4ce0e62306dd6a5716965d4519ada776f947e6dfc145b604b11307c10277ef29"
  },
  "kernelspec": {
   "display_name": "Python 3.9.6 64-bit",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.6"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
